/* 
*** Welfare estimation with new regression approach 
	// input: estlevel_dataset
	// output: welfare_data, 
	 - TableF1.xlsx 

PART 1: Prepare the data and create quantities of interest 
PART 2: Get welfare estimates 
PART 3: Bootstrap to get SE 
PART 4: Combine in table 
PART 5: Robustness of women's welfare to changes in eta 
*/



cap log close
cap log using "$logs/welfare_log_withSE", replace 


	// Load establishment-level data 
	use "$files/estlevel_dataset.dta", clear


********************************************************************************
*** DATA PREP 
********************************************************************************

	* Sample restrictions
	*********************************************************************************

	// RESTRICTION 1: 2012-2017 
	keep if inrange(year, 2012, 2017) 
	gen post = inrange(year, 2015, 2017)

	// RESTRICTION 2: estab observed at least once pre and once post in RAIS 
	gen xxx = post if exit==0
	bys fakeid_estab: egen minpost = min(xxx)
	bys fakeid_estab: egen maxpost = max(xxx)

	gen hasprepost = (minpost != maxpost) 
	tab hasprepost
	keep if hasprepost == 1


	// RESTRICTION 3: estab actually covered, signing, employing both men and women at bl 
	keep if bl_covered==1 & bl_bothFM==1 & bl_signing==1



	* Aggregate data in 2 periods: before and after the reform = collapse at pre-post level 
	******************************************************************************************
	collapse (mean) empfem_cbage=cbage_fem avgwagefemcbage_est=wage_cbage_fem  ///
					empfem=tot_fem avgwagefem_est=wage_tot_fem  ///
					empmal_cbage=cbage_mal avgwagemalcbage_est=wage_cbage_mal  ///
					empmal=tot_mal avgwagemal_est=wage_tot_mal ///
			 (first) bl_singest bl_bothFM bl_signing bl_covered ///
					, by(fakeid_estab treat post) fast

	rename empfem_cbage empfemcbage
	rename empmal_cbage empmalcbage
		
	local wgroups femcbage malcbage fem mal 
	foreach g of local wgroups {
		
		// Tag estab with zero women or men in both pre and post -> won't be counted in estimation 
		gen temp = (avgwage`g'_est== .)
		bys fakeid_estab: egen min_temp = min(temp)
		bys fakeid_estab: egen max_temp = max(temp)
		gen hasno_`g' = ((min_temp == max_temp) & (min_temp == 1)) 
		drop temp min_temp max_temp 
		replace emp`g' = 0.5 if emp`g'== 0 & hasno_`g' == 0
		
		// Keep counterfactual wages constant within each estab 
		sort fakeid_estab post
		replace avgwage`g'_est = avgwage`g'_est[_n-1] if avgwage`g'_est== . & post == 1 & avgwage`g'_est[_n-1]!=. & hasno_`g' == 0 
		replace avgwage`g'_est = avgwage`g'_est[_n+1] if avgwage`g'_est== . & post == 0 & avgwage`g'_est[_n+1]!=. & hasno_`g' == 0 

	}


	
	* Quantities of interest	
	***********************************
	local groups femcbage fem malcbage  mal 
	
	foreach g of local groups {
			
			* average estab wages in pre-period and post-period
			rename avgwage`g'_est w_k_`g'
			
			* average estab employment in pre-period and post-period
			rename emp`g' n_k_`g'
			
			* log w_kt
			gen log_w_k_`g' = log(w_k_`g')
			assert log_w_k_`g'==. if hasno_`g' == 1
			
			* w_k * n_k 
			gen wn_k_`g' = w_k_`g'*n_k_`g'
			
			* Total WN 
			bys post: egen WN_`g' = total(wn_k_`g')
			
			* Total WN_CUT 
			bys post: egen temp = total(wn_k_`g') if treat == 1 
			bys post: egen WN_CUT_`g' = max(temp)
			drop temp 
			
			* Total WN_NC 
			bys post: egen temp = total(wn_k_`g') if treat == 0 
			bys post: egen WN_NC_`g' = max(temp)
			drop temp 
			
			* log s*_kt (defined only for NON-CUT)
			gen s_star_`g' = wn_k_`g'/WN_NC_`g' if treat == 0 
			gen log_s_star_`g' = log(s_star_`g') // now this cannot be missing because of trick above 
			
			* calibrated lambda 
			gen lambda_`g' = WN_NC_`g'/WN_`g'
			
			* calibrated ln(lambda_t/lambda_t-1)
			qui su lambda_`g' if post ==1 
			local lambda_t = r(max)
			
			qui su lambda_`g' if post ==0 
			local lambda_t_1 = r(max)
			
			gen lambda_diff_`g' = ln(`lambda_t'/`lambda_t_1')
			
			* calibrated ln(w*t/w*t-1)
			bys post treat: egen sumlnw_`g' = total(log_w_k_`g')
			
			qui su sumlnw_`g' if post == 0 & treat == 0 
			local sumlnw_t_1 = r(max)
			
			qui su sumlnw_`g' if post == 1 & treat == 0 
			local sumlnw_t = r(max)
			
			qui count if treat == 0 & post == 0 & hasno_`g' == 0 
			local N_NC = r(N)
			gen logw_diff_`g' = (1/`N_NC') * (`sumlnw_t' - `sumlnw_t_1')

			
			* calibrated ln(s*t/s*t-1)
			bys post treat: egen sumlns_`g' = total(log_s_star_`g')
			
			qui su sumlns_`g' if post == 0 & treat == 0 
			local sumlns_t_1 = r(max)
			
			qui su sumlns_`g' if post == 1 & treat == 0 
			local sumlns_t = r(max)
			
			qui count if treat == 0 & post == 0 & hasno_`g' == 0 
			local N_NC = r(N)
			gen log_s_star_diff_`g' = (1/`N_NC') * (`sumlns_t' - `sumlns_t_1')

				
		} 	
			
				
		* s tilde and s hat for first order approximation 
		preserve
			// reshape wide 
					keep w_k_fem w_k_mal w_k_femcbage w_k_malcbage ///
					n_k_fem n_k_mal n_k_femcbage n_k_malcbage ///
					wn_k_fem wn_k_femcbage wn_k_mal wn_k_malcbage ///
					WN_fem WN_CUT_fem WN_NC_fem WN_femcbage WN_CUT_femcbage ///
					WN_NC_femcbage WN_mal WN_CUT_mal WN_NC_mal WN_malcbage ///
					WN_CUT_malcbage WN_NC_malcbage fakeid_estab treat post
					
					
					reshape wide w_k_fem w_k_mal w_k_femcbage w_k_malcbage ///
					n_k_fem n_k_mal n_k_femcbage n_k_malcbage ///
					wn_k_fem wn_k_femcbage wn_k_mal wn_k_malcbage ///
					WN_fem WN_CUT_fem WN_NC_fem WN_femcbage WN_CUT_femcbage ///
					WN_NC_femcbage WN_mal WN_CUT_mal WN_NC_mal WN_malcbage ///
					WN_CUT_malcbage WN_NC_malcbage , i(fakeid_estab treat) j(post)
					
					local groups femcbage fem malcbage  mal 
					foreach g of local groups {
						
					
						* \tilde{s}_kt - keeping wages fixed at pre-period level 
						gen s_tilde_`g'_k1 = (w_k_`g'0 * n_k_`g'1)/WN_`g'0 
						
						gen s_tilde_`g'_k0 = (w_k_`g'0 * n_k_`g'0)/WN_`g'0 
			
						* \hat{s}_kt - keeping employment fixed at pre-period level 
						gen s_hat_`g'_k1 = (w_k_`g'1 * n_k_`g'0)/WN_`g'0  
						
						gen s_hat_`g'_k0 = (w_k_`g'0 * n_k_`g'0)/WN_`g'0  
							
					}
					
				// reshape long 
				keep fakeid_estab treat s_tilde* s_hat*  
					
				reshape long s_tilde_fem_k s_tilde_femcbage_k s_tilde_mal_k s_tilde_malcbage_k ///
				s_hat_fem_k s_hat_femcbage_k s_hat_mal_k s_hat_malcbage_k, i(fakeid_estab treat) j(post)
				
				isid fakeid_estab post 
				tempfile shares
				save `shares'
				
		restore 
		
		cap drop _merge
		merge 1:1 fakeid_estab post  using `shares'

// save data 
save "$files\welfare_data.dta", replace 

********************************************************************************
*** ESTIMATES
********************************************************************************
use "$files\welfare_data.dta", clear 
local groups femcbage fem malcbage  mal 

* Regressions 
		foreach g of local groups {
		    
			* Effect on logw of NON CUT
			reghdfe log_w_k_`g'  post if treat==0 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local w_`g' = _b[post]
			local w_se_`g' = _se[post]
			
			* Effect on logs* of NON CUT
			reghdfe log_s_star_`g' post if treat==0 & hasno_`g' ==0 , absorb(i.fakeid_estab)
			local s_`g' = _b[post]
			local s_se_`g' = _se[post]
			
			* Effect on \tilde{s}_kt of NON CUT
			reghdfe s_tilde_`g'_k  post if treat==0 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_tildeNC_`g' = _b[post]
	
			* Effect on \tilde{s}_kt of CUT
			reghdfe s_tilde_`g'_k  post if treat==1 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_tildeCUT_`g' = _b[post]
			
			* Effect on \hat{s}_kt of NON CUT
			reghdfe s_hat_`g'_k  post if treat==0 & hasno_`g' ==0 , absorb(i.fakeid_estab)
			local s_hatNC_`g' = _b[post]
			
			* Effect on \hat{s}_kt of CUT
			reghdfe s_hat_`g'_k  post if treat==1 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_hatCUT_`g' = _b[post]
						
			
		}
				
*Combine estimates 
	mat R = J(9, 4, .) // estimates
	mat SE = J(2, 4, .) // se of w* and s* 
	
	local eta = 1.015
	local eta_1_inv = -1/(1+`eta')
	
	local c = 1 
		
	foreach g of local groups {
		
		*N_NC 
		qui count if treat == 0 & post == 0 & hasno_`g' == 0 
		local N_NC = r(N)
		
		*N_CUT
		qui count if treat == 1 & post == 0 & hasno_`g' == 0 
		local N_CUT = r(N)
		
		* WN_CUT, t-1
		qui su WN_NC_`g' if post == 0 
		local WN_NC = r(mean)
		
		* WN_NC, t-1
		qui su WN_CUT_`g' if post == 0 
		local WN_CUT = r(mean)
		
		* Total welfare
		mat R[1, `c'] =         `w_`g''  + `eta_1_inv'* `s_`g''  + ///
					`eta_1_inv'*( (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' ))
		
		
		* Lambda component: - (1/(1+eta)) * ln(lambda_t/lambda_t_1)  
		mat R[2, `c'] = `eta_1_inv'*( (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' ))
		
		
		* Wage component: ln(w*_t/w*_t_1)  
		mat R[3, `c'] = `w_`g''
		
		* Dispersion component: - (1/(1+eta)) * ln(s*_t/s*_t_1)  
		mat R[4, `c'] = `eta_1_inv'* `s_`g'' 
		
		
		
		* just ln(lambda_t/lambda_t_1)   
		mat R[5, `c'] =  (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' )
		
		* just ln(s*_t/s*_t_1)   
		mat R[6, `c'] = `s_`g'' 
		
		
		* N_ALL 
		mat R[8, `c'] = `N_NC'+`N_CUT'
		
		* N_NC 
		mat R[9, `c'] = `N_NC'
		
		
		* SE w* 
		mat SE[1, `c'] = `w_se_`g''
		
		* SE s* 
		mat SE[2, `c'] = `s_se_`g'' 
		
		
		local ++c 
	}
	

	clear 
	svmat  R
	rename R1 fem_cbage 
	rename R2 fem 
	rename R3 mal_cbage 
	rename R4 mal 
	
	gen quantity = ""
	replace quantity = "welfare" in 1 
	replace quantity = "lambda_comp" in 2 
	replace quantity = "wage_comp" in 3 
	replace quantity = "dispersion_comp" in 4 
	replace quantity = "lambda" in 5 
	replace quantity = "dispersion" in 6 
	
	replace quantity = "N" in 8 
	replace quantity = "N non CUT" in 9 

	export excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("estimates") firstrow(variables) sheetreplace
	
	clear 
	svmat  SE
	rename SE1 fem_cbage 
	rename SE2 fem 
	rename SE3 mal_cbage 
	rename SE4 mal 
	
	gen quantity = ""
	replace quantity = "wage_comp" in 1 
	replace quantity = "dispersion" in 2 


	export excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("se_estimates") firstrow(variables) sheetreplace
	
	
*********************************************************************************
*** BOOTSTRAP 
*********************************************************************************
local S = 1000 // total sample draws we want to run
forvalues i = 1/`S' {
	
	// load data 
	use "$files\welfare_data.dta", clear 
	
	// sample with replacement 
	gsample, cluster(fakeid_estab)
	unique fakeid_estab 
	
	// run regressions 
	local groups femcbage fem malcbage  mal 
		foreach g of local groups {
		    
		    
			* Effect on logw of NON CUT
			reghdfe log_w_k_`g'  post if treat==0 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local w_`g' = _b[post]
			
			* Effect on logs* of NON CUT
			reghdfe log_s_star_`g' post if treat==0 & hasno_`g' ==0 , absorb(i.fakeid_estab)
			local s_`g' = _b[post]
			
			* Effect on \tilde{s}_kt of NON CUT
			reghdfe s_tilde_`g'_k  post if treat==0 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_tildeNC_`g' = _b[post]
	
			* Effect on \tilde{s}_kt of CUT
			reghdfe s_tilde_`g'_k  post if treat==1 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_tildeCUT_`g' = _b[post]
			
			* Effect on \hat{s}_kt of NON CUT
			reghdfe s_hat_`g'_k  post if treat==0 & hasno_`g' ==0 , absorb(i.fakeid_estab)
			local s_hatNC_`g' = _b[post]
			
			* Effect on \hat{s}_kt of CUT
			reghdfe s_hat_`g'_k  post if treat==1 & hasno_`g' ==0  , absorb(i.fakeid_estab)
			local s_hatCUT_`g' = _b[post]
					
		}
		
	// aggregate in welfare quantities 
	mat R = J(6, 4, .)
	local eta = 1.015
	local eta_1_inv = -1/(1+`eta')
	
	local c = 1 
	
	local groups femcbage fem malcbage  mal 
	
	foreach g of local groups {
		
		*N_NC 
		qui count if treat == 0 & post == 0 & hasno_`g' == 0 
		local N_NC = r(N)
		
		*N_CUT
		qui count if treat == 1 & post == 0 & hasno_`g' == 0 
		local N_CUT = r(N)
		
		* WN_CUT, t-1
		qui su WN_NC_`g' if post == 0 
		local WN_NC = r(mean)
		
		* WN_NC, t-1
		qui su WN_CUT_`g' if post == 0 
		local WN_CUT = r(mean)
		
		* Total welfare
		mat R[1, `c'] =         `w_`g''  + `eta_1_inv'* `s_`g''  + ///
					`eta_1_inv'*( (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' ))
		
		* Lambda component: - (1/(1+eta)) * ln(lambda_t/lambda_t_1)  
		mat R[2, `c'] = `eta_1_inv'*( (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' ))
		
		* Wage component: ln(w*_t/w*_t_1)  
		mat R[3, `c'] = `w_`g''
		
		* Dispersion component: - (1/(1+eta)) * ln(s*_t/s*_t_1)  
		mat R[4, `c'] = `eta_1_inv'* `s_`g'' 
		
		* just ln(lambda_t/lambda_t_1)   
		mat R[5, `c'] =  (`WN_CUT'*`N_NC'/`WN_NC')* (`s_tildeNC_`g''+ `s_hatNC_`g'' ) ///
					- `N_CUT' *(`s_tildeCUT_`g'' + `s_hatCUT_`g'' )
		
		* just ln(s*_t/s*_t_1)   
		mat R[6, `c'] = `s_`g'' 
		
		local ++c 
		
	}
		
	// store results in new temp dataset
	clear 
	svmat R
	rename R1 fem_cbage 
	rename R2 fem 
	rename R3 mal_cbage 
	rename R4 mal 
		
	gen sample = `i' 
	
	gen quantity = ""
	replace quantity = "welfare" in 1 
	replace quantity = "lambda_comp" in 2 
	replace quantity = "wage_comp" in 3 
	replace quantity = "dispersion_comp" in 4 
	replace quantity = "lambda" in 5 
	replace quantity = "dispersion" in 6 
	
	tempfile welfare`i' 
	save `welfare`i''

}

*** Append bootstrapped samples 
use `welfare1' , clear 
forvalues i = 2/`S' { 
	append using `welfare`i''
}

*** Compute SD for each quantity of interest 
foreach g in fem_cbage fem mal_cbage mal {
	egen se_`g' = sd(`g'), by(quantity)
}

*** Export SE in table 
keep se_* quantity 
duplicates drop 

export excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("se") firstrow(variables) sheetreplace

*********************************************************************************
*** MAKE TABLE 
*********************************************************************************	

// import bootstrapped se 
import excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("se") firstrow clear 	
tempfile se 
save `se'

// import estimates se 
import excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("se_estimates") firstrow clear 
rename fem* se_es_fem*
rename mal* se_es_mal*

tempfile se_estimates 
save `se_estimates'
	
//import estimates
	import excel using "$tables/welfare_FINAL.xlsx", ///
	sheet("estimates") firstrow clear 
	gen row = _n 
// merge with SE 
	merge 1:1 quantity using `se' 
	drop _merge 
// merge with SE 
	merge 1:1 quantity using `se_estimates' 
	drop _merge 

// reshape 
rename se_* *_se
rename es_* *_es
rename (fem fem_se mal mal_se fem_se_es mal_se_es) (F F_se M M_se F_se_es M_se_es) 
foreach x in fem_cbage F mal_cbage M {
	rename `x' `x'_es
}
reshape long fem_cbage F mal_cbage M , i(quantity) string
rename (F M )  (fem  mal ) 


order quantity fem_cbage fem mal_cbage mal, first 
sort row _j


drop if _j == "_se_es" & fem == . 
drop if strpos(quantity, "N")>0 &  _j =="_se"

	
replace quantity = quantity + _j if _j == "_se" | _j == "_se_es"
keep quantity - mal

drop in 16

export excel using "$tables/TableF1.xlsx", ///
	sheet("tableF1") firstrow(variables) sheetreplace
	

cap log close 

